	* Generate Census population estimates, merge with VS mortality data and create mortality rates
	* March 24, 2016
	* Hannes Schwandt, hannes.schwandt@uzh.ch

	
	*********************************************************************
	*----------------------- Define Directories ------------------------*
	*********************************************************************

	global data_orig 			"/original/"
	global data_cleaned 		"/data/"
	global VS_orig 				"/VSmortality/"

	**************************************************************************************************
	**************************************************************************************************
	*--------------------- Create Age-Specific Mortality Rates by County Groups ---------------------*
	**************************************************************************************************
	**************************************************************************************************
	
	
	
	**************************************************************************************************
	*----------------------- Census Population Counts, County X Gender X Age ------------------------*
	*************************** Raw files from http://socialexplorer.com/ ****************************
	**************************************************************************************************
	
{
	*********************
	** Population 1990 **
	*********************

	infile using "${data_orig}county_population/census1990/R10985046.dct", using("${data_orig}county_population/census1990/R10985046_SL050_agegroupXraceXsex.txt") clear


				destring FIPS, gen(county)
					destring STATE, gen(state)
					rename QName geo_qname

					forvalues x=1/9{
					rename P012_00`x' P012_`x'
					}
					forvalues x=10/99{
					rename P012_0`x' P012_`x'
					}
					
					drop NAME SUMLEV GEOCOMP REGION DIVISION FIPS STATE COUNTY
					
					reshape long P012_, i(county) j(sexagerace)

					rename P012 population
					
					*assign gender across different race and age groups
					gen male=.
					gen agegroup31=.

					forvalues x=0/4{
					local ymale=4+32*`x'*2+`x'
					local zmale=`ymale'+30
					display as text "`ymale' to `zmale'"
					
					replace male=1 if inrange(sexagerace,`ymale',`zmale')
					
					replace agegroup31=sexagerace-`ymale' if inrange(sexagerace,`ymale',`zmale')

					local yfemale=36+32*`x'*2+`x'
					local zfemale=`yfemale'+30
					display as text "`yfemale' to `zfemale'"
					
					replace male=0 if inrange(sexagerace,`yfemale',`zfemale')
					replace agegroup31=sexagerace-`yfemale' if inrange(sexagerace,`yfemale',`zfemale')
					
					}

				drop if male==.
				drop if agegroup==.
				
				collapse (sum) population, by(county agegroup31 male)


					cap label define agegroup31 0 "Under 1 year"  1 "1 and 2 years"  2 "3 and 4 years"  3 "5 years"  4 "6 years"  5 "7 to 9 years"  6 "10 and 11 years"  7 "12 and 13 years"  8 "14 years"  9 "15 years"  10 "16 years"  11 "17 years"  12 "18 years"  13 "19 years"  14 "20 years"  15 "21 years"  16 "22 to 24 years"  17 "25 to 29 years"  18 "30 to 34 years"  19 "35 to 39 years"  20 "40 to 44 years"  21 "45 to 49 years"  22 "50 to 54 years"  23 "55 to 59 years"  24 "60 and 61 years"  25 "62 to 64 years"  26 "65 to 69 years"  27 "70 to 74 years"  28 "75 to 79 years"  29 "80 to 84 years"  30 "85 years and over"    
					label values agegr agegroup31

				gen agegroup19=.
						replace agegroup19=0 if agegroup31== 0
						replace agegroup19= 2 if agegroup31== 1
						replace agegroup19= 2 if agegroup31== 2
						replace agegroup19= 7 if agegroup31== 3
						replace agegroup19= 7 if agegroup31== 4
						replace agegroup19= 7 if agegroup31== 5
						replace agegroup19= 12 if agegroup31== 6
						replace agegroup19= 12 if agegroup31== 7
						replace agegroup19= 12 if agegroup31== 8
						replace agegroup19= 17 if agegroup31== 9
						replace agegroup19= 17 if agegroup31== 10
						replace agegroup19= 17 if agegroup31== 11
						replace agegroup19= 17 if agegroup31== 12
						replace agegroup19= 17 if agegroup31== 13
						replace agegroup19= 22 if agegroup31== 14
						replace agegroup19= 22 if agegroup31== 15
						replace agegroup19= 22 if agegroup31== 16
						replace agegroup19= 27 if agegroup31== 17
						replace agegroup19= 32 if agegroup31== 18
						replace agegroup19= 37 if agegroup31== 19
						replace agegroup19= 42 if agegroup31== 20
						replace agegroup19= 47 if agegroup31== 21
						replace agegroup19= 52 if agegroup31== 22
						replace agegroup19= 57 if agegroup31== 23
						replace agegroup19= 62 if agegroup31== 24
						replace agegroup19= 62 if agegroup31== 25
						replace agegroup19= 67 if agegroup31== 26
						replace agegroup19= 72 if agegroup31== 27
						replace agegroup19= 77 if agegroup31== 28
						replace agegroup19= 82 if agegroup31== 29
						replace agegroup19= 90 if agegroup31== 30 
				
				collapse (sum) population, by(county agegroup19 male)
				
				order county age male

				gen year=1990
				
	save ${data_cleaned}countypop1990_19agebins, replace	


	*********************
	** Population 2000 **
	*********************
	infile using "${data_orig}county_population/census2000/R10985040.dct", using("${data_orig}county_population/census2000/R10985040_SL050_sexXagegroup.txt") clear

		destring FIPS, gen(county)
			destring STATE, gen(state)
			rename QName geo_qname

			forvalues x=1/9{
			rename P01200`x' P0120`x'
			}
			
			
			forvalues x=1/9{
			rename P01400`x' P012010`x'
			}
			
			forvalues x=10/43{
			rename P0140`x' P01201`x'
			}
			
			
			drop NAME SUMLEV GEOCOMP REGION DIVISION FIPS STATE COUNTY AREA*

			reshape long P0120, i(county) j(sexage)

			rename P012 population
			
				
			*assign gender across different race and age groups
			gen male=.
			gen agegroup38=.
			
			replace male=1 if inrange(sexage,8,25) | inrange(sexage,103,122)
			replace male=0 if inrange(sexage,32,49) | inrange(sexage,124,143)
			
			drop if male==.
			
			replace agegroup38=sexage-103 if inrange(sexage,103,122)
			replace agegroup38=sexage-124 if inrange(sexage,124,143)
			
			replace agegroup38=sexage-8  +20 if inrange(sexage,8,25)
			replace agegroup38=sexage-32 +20 if inrange(sexage,32,49)
			
			
			cap label define agegroup38 0 "Under 1 year"  1 "1 year"  2 "2 years"  3 "3 years"  4 "4 years"  5 "5 years"  6 "6 years"  7 "7 years"  8 "8 years"  9 "9 years"  10 "10 years"  11 "11 years"  12 "12 years"  13 "13 years"  14 "14 years"  15 "15 years"  16 "16 years"  17 "17 years"  18 "18 years"  19 "19 years"  20 "20 years"  21 "21 years"  22 "22 to 24 years"  23 "25 to 29 years"  24 "30 to 34 years"  25 "35 to 39 years"  26 "40 to 44 years"  27 "45 to 49 years"  28 "50 to 54 years"  29 "55 to 59 years"  30 "60 and 61 years"  31 "62 to 64 years"  32 "65 and 66 years"  33 "67 to 69 years"  34 "70 to 74 years"  35 "75 to 79 years"  36 "80 to 84 years"  37 "85 years and over"                                                                   
			label values agegroup38 agegroup38
			
			gen agegroup19=.
				replace agegroup19= 0  if inrange(agegroup38, 0 , 0 )
				replace agegroup19= 2  if inrange(agegroup38, 1 , 4 )
				replace agegroup19= 7  if inrange(agegroup38, 5 , 9 )
				replace agegroup19= 12  if inrange(agegroup38, 10 , 14 )
				replace agegroup19= 17  if inrange(agegroup38, 15 , 19 )
				replace agegroup19= 22  if inrange(agegroup38, 20 , 22 )
				replace agegroup19= 27  if inrange(agegroup38, 23 , 23 )
				replace agegroup19= 32  if inrange(agegroup38, 24 , 24 )
				replace agegroup19= 37  if inrange(agegroup38, 25 , 25 )
				replace agegroup19= 42  if inrange(agegroup38, 26 , 26 )
				replace agegroup19= 47  if inrange(agegroup38, 27 , 27 )
				replace agegroup19= 52  if inrange(agegroup38, 28 , 28 )
				replace agegroup19= 57  if inrange(agegroup38, 29 , 29 )
				replace agegroup19= 62  if inrange(agegroup38, 30 , 31 )
				replace agegroup19= 67  if inrange(agegroup38, 32 , 33 )
				replace agegroup19= 72  if inrange(agegroup38, 34 , 34 )
				replace agegroup19= 77  if inrange(agegroup38, 35 , 35 )
				replace agegroup19= 82  if inrange(agegroup38, 36 , 36 )
				replace agegroup19= 90  if inrange(agegroup38, 37 , 37 )
				
			collapse state (sum) population, by(male agegroup19 county)
			
			gen year=2000

	save ${data_cleaned}countypop2000_19agebins, replace	
	
	
	*********************
	** Population 2010 **
	*********************
	
	infile using "${data_orig}county_population/census2010/R10985042.dct", using("${data_orig}county_population/census2010/R10985042_SL050_ageXsex.txt") clear

destring FIPS, gen(county)
	destring STATE, gen(state)
	rename QName geo_qname
	
		drop NAME SUMLEV GEOCOMP REGION DIVISION FIPS STATE COUNTY AREA*

	
		forvalues x=1/9{
	rename PCT012000`x' PCT0120`x'
	}
		
		forvalues x=10/99{
	rename PCT01200`x' PCT0120`x'
	}
	
	
	reshape long PCT0120, i(county) j(sexage)

	rename  PCT0120 population
	
	gen male=.
	
	replace male=1 if inrange(sexage,3,105) 
	replace male=0 if inrange(sexage,107,209)
	
	drop if male==.
	
	gen agegroup103=.
	replace agegroup103=sexage-3 if inrange(sexage,3,105) 
	replace agegroup103=sexage-107 if inrange(sexage,107,209)
	
	label define agegroup103 0 "Under 1 year" 1 "1 years" 2 "2 years" 3 "3 years" 4 "4 years" 5 "5 years" 6 "6 years" 7 "7 years" 8 "8 years" 9 "9 years" 10 "10 years" 11 "11 years" 12 "12 years" 13 "13 years" 14 "14 years" 15 "15 years" 16 "16 years" 17 "17 years" 18 "18 years" 19 "19 years" 20 "20 years" 21 "21 years" 22 "22 years" 23 "23 years" 24 "24 years" 25 "25 years" 26 "26 years" 27 "27 years" 28 "28 years" 29 "29 years" 30 "30 years" 31 "31 years" 32 "32 years" 33 "33 years" 34 "34 years" 35 "35 years" 36 "36 years" 37 "37 years" 38 "38 years" 39 "39 years" 40 "40 years" 41 "41 years" 42 "42 years" 43 "43 years" 44 "44 years" 45 "45 years" 46 "46 years" 47 "47 years" 48 "48 years" 49 "49 years" 50 "50 years" 51 "51 years" 52 "52 years" 53 "53 years" 54 "54 years" 55 "55 years" 56 "56 years" 57 "57 years" 58 "58 years" 59 "59 years" 60 "60 years" 61 "61 years" 62 "62 years" 63 "63 years" 64 "64 years" 65 "65 years" 66 "66 years" 67 "67 years" 68 "68 years" 69 "69 years" 70 "70 years" 71 "71 years" 72 "72 years" 73 "73 years" 74 "74 years" 75 "75 years" 76 "76 years" 77 "77 years" 78 "78 years" 79 "79 years" 80 "80 years" 81 "81 years" 82 "82 years" 83 "83 years" 84 "84 years" 85 "85 years" 86 "86 years" 87 "87 years" 88 "88 years" 89 "89 years" 90 "90 years" 91 "91 years" 92 "92 years" 93 "93 years" 94 "94 years" 95 "95 years" 96 "96 years" 97 "97 years" 98 "98 years" 99 "99 years" 100 "100 to 104 years" 101 "105-109 years" 102 "100 and over years" 
	label values agegroup103 agegroup103 
			
		gen agegroup19=. 
			replace agegroup19= 0 if inrange(agegroup103, 0 , 0 )
			replace agegroup19= 2 if inrange(agegroup103, 1 , 4 )
			replace agegroup19= 7 if inrange(agegroup103, 5 , 9 )
			replace agegroup19= 12 if inrange(agegroup103, 10 , 14 )
			replace agegroup19= 17 if inrange(agegroup103, 15 , 19 )
			replace agegroup19= 22 if inrange(agegroup103, 20 , 24 )
			replace agegroup19= 27 if inrange(agegroup103, 25 , 29 )
			replace agegroup19= 32 if inrange(agegroup103, 30 , 34 )
			replace agegroup19= 37 if inrange(agegroup103, 35 , 39 )
			replace agegroup19= 42 if inrange(agegroup103, 40 , 44 )
			replace agegroup19= 47 if inrange(agegroup103, 45 , 49 )
			replace agegroup19= 52 if inrange(agegroup103, 50 , 54 )
			replace agegroup19= 57 if inrange(agegroup103, 55 , 59 )
			replace agegroup19= 62 if inrange(agegroup103, 60 , 64 )
			replace agegroup19= 67 if inrange(agegroup103, 65 , 69 )
			replace agegroup19= 72 if inrange(agegroup103, 70 , 74 )
			replace agegroup19= 77 if inrange(agegroup103, 75 , 79 )
			replace agegroup19= 82 if inrange(agegroup103, 80 , 84 )
			replace agegroup19= 90 if inrange(agegroup103, 85 , 110 )
	
			collapse (sum) population, by(county agegroup19 male)
						
						order county age male

						gen year=2010
						
	save ${data_cleaned}countypop2010_19agebins, replace	
				
				
	**********************
	** Merge population **
	**********************

	use  "${data_cleaned}countypop1990_19agebins", clear
	append using "${data_cleaned}countypop2000_19agebins"
	append using "${data_cleaned}countypop2010_19agebins"
	drop if county>72000
	sort county year male
	save "${data_cleaned}countypop1990_2010", replace	
	
	
}
	
	**************************************************************************************************
	*-------------------- Vital Statistics Death Counts, County X Gender X Age ----------------------*
	*********** Raw files from http://www.cdc.gov/nchs/data_access/vitalstatsonline.htm **************
	**************************************************************************************************
			
		*********************************************************************
		*------------------- Define Globals & Programs ---------------------*
		*********************************************************************
		{
		global infix2002 "infix month 55-56 ageunit 64 agenr 65-66 str sex 59 state_res 124-125 county_res 126-128"
		global infix2003 "infix month 65-66 ageunit 70 agenr 71-73 str sex 69 str state_res 29-30 str county_res 35-37"

		global load_1990 "${VS_orig}Mort1990/MORT1990.PT2"		
		global load_1991 "${VS_orig}Mort1991/MORT1991.PT2"		
		global load_1992 "${VS_orig}Mort1992/MORT1992.PT2"		
		global load_1993 "${VS_orig}Mort1993/MULT1993.PAR"
		global load_2000 "${VS_orig}Mort2000/MORT00US.PT2" 
		global load_2001 "${VS_orig}Mort2001/Mort01us.dat" 
		global load_2002 "${VS_orig}Mort2002/Mort02us.dat" 
		global load_2003 "${VS_orig}Mort2003/Mult03us.dat" 
		global load_2010 "${VS_orig}Mort2010/MULT2010.USPART2.EXACTDOD" 
		global load_2011 "${VS_orig}Mort2011/MULT2011.USPART2" 
		global load_2012 "${VS_orig}Mort2012/MULT2012.USPART2"
		global load_2013 "${VS_orig}Mort2013/MULT2013.USPART2" 

		*--------------------------- clean_VS ------------------------------*

				capture program drop clean_VS
				program define clean_VS
		
					if year<=2002{
						gen age= agenr if ageunit==0
						replace age=agenr+100 if ageunit==1
						replace age=0 if ageunit>=2 & ageunit<9
						gen male=(sex=="1")  if sex!="."
						rename state_res state	
					}
					
					if year>=2003{
						gen age=agenr if ageunit==1
						replace age=0 if ageunit>1 & ageunit<9
						replace age=. if age==999
						gen male=(sex=="M")
						gen state=.		
						#delimit;
						qui{; replace state=1 if state_res=="AL" ; replace state=2 if state_res=="AK" ; replace state=4 if state_res=="AZ" ; replace state=5 if state_res=="AR" ; replace state=6 if state_res=="CA" ; replace state=8 if state_res=="CO" ; replace state=9 if state_res=="CT" ; replace state=10 if state_res=="DE" ; replace state=11 if state_res=="DC" ; replace state=12 if state_res=="FL" ; replace state=13 if state_res=="GA" ; replace state=15 if state_res=="HI" ; replace state=16 if state_res=="ID" ; replace state=17 if state_res=="IL" ; replace state=18 if state_res=="IN" ; replace state=19 if state_res=="IA" ; replace state=20 if state_res=="KS" ; replace state=21 if state_res=="KY" ; replace state=22 if state_res=="LA" ; replace state=23 if state_res=="ME" ; replace state=24 if state_res=="MD" ; replace state=25 if state_res=="MA" ; replace state=26 if state_res=="MI" ; replace state=27 if state_res=="MN" ; replace state=28 if state_res=="MS" ; replace state=29 if state_res=="MO" ; replace state=30 if state_res=="MT" ; replace state=31 if state_res=="NE" ; replace state=32 if state_res=="NV" ; replace state=33 if state_res=="NH" ; replace state=34 if state_res=="NJ" ; replace state=35 if state_res=="NM" ; replace state=36 if state_res=="NY" ; replace state=37 if state_res=="NC" ; replace state=38 if state_res=="ND" ; replace state=39 if state_res=="OH" ; replace state=40 if state_res=="OK" ; replace state=41 if state_res=="OR" ; replace state=42 if state_res=="PA" ; replace state=44 if state_res=="RI" ; replace state=45 if state_res=="SC" ; replace state=46 if state_res=="SD" ; replace state=47 if state_res=="TN" ; replace state=48 if state_res=="TX" ; replace state=49 if state_res=="UT" ; replace state=50 if state_res=="VT" ; replace state=51 if state_res=="VA" ; replace state=53 if state_res=="WA" ; replace state=54 if state_res=="WV" ; replace state=55 if state_res=="WI" ; replace state=56 if state_res=="WY" ; } ;
						#delimit cr
							destring county_res, replace
									}

					gen county=state*1000+county_res
					
						display as text "foreign residents"
						count if state==0 | state>56
						drop if state==0 | state>56
										
					gen censusday=(month>=4)
					
					gen deaths=1
				
					collapse state (sum) deaths , by(age male county censusday)
				
					cap label define lab_state 001 "Alabama" 002 "Alaska" 004 "Arizona" 005 "Arkansas" 006 "California" 008 "Colorado" 009 "Connecticut" 010 "Delaware" 011 "District of Columbia" 012 "Florida" 013 "Georgia" 015 "Hawaii" 016 "Idaho" 017 "Illinois" 018 "Indiana" 019 "Iowa" 020 "Kansas" 021 "Kentucky" 022 "Louisiana" 023 "Maine" 024 "Maryland" 025 "Massachusetts" 026 "Michigan" 027 "Minnesota" 028 "Mississippi" 029 "Missouri" 030 "Montana" 031 "Nebraska" 032 "Nevada" 033 "New Hampshire" 034 "New Jersey" 035 "New Mexico" 036 "New York" 037 "North Carolina" 038 "North Dakota" 039 "Ohio" 040 "Oklahoma" 041 "Oregon" 042 "Pennsylvania" 044 "Rhode Island" 045 "South Carolina" 046 "South Dakota" 047 "Tennessee" 048 "Texas" 049 "Utah" 050 "Vermont" 051 "Virginia" 053 "Washington" 054 "West Virginia" 055 "Wisconsin" 056 "Wyoming" 
					label values state lab_state
				
				end // clean_VS
			}
			
	*********************************
	*------Load VS Mortality--------*
	*********************************
	{
	
	
	*Load years
	forvalues y=1990(10)2010 {       
	forvalues z=0/3 { 
	local x=`y'+`z'
	display `x'
	if `x'<=2002{
	${infix2002} using ${load_`x'} , clear 
	}
	if `x'>=2003{
	${infix2003} using ${load_`x'} , clear 
	}

			gen year=`x'	
		clean_VS
			gen year=`x'
			save ${data_cleaned}VS_mort_`x'_county, replace
	}
	}
				
	*********************************
	*-------Merge Mortality---------*
	*********************************
	
	*1990-1993
	use ${data_cleaned}VS_mort_1990_county, clear
	append using ${data_cleaned}VS_mort_1991_county
	append using ${data_cleaned}VS_mort_1992_county
	append using ${data_cleaned}VS_mort_1993_county
	
	*2000-2003 / 2010-2013
	forvalues y=2000(10)2010 {
	forvalues z=0/3 { 
	local x=`y'+`z'
	display `x'
	append using ${data_cleaned}VS_mort_`x'_county
	}
	}
	
	
	*drop if before census day in first year or after census day in last year
	drop if censusday==0 & inlist(year,1980,1990,2000,2010)
	drop if censusday==1 & inlist(year,1983,1993,2003,2013)
	replace year=year-1 if censusday==0  /*Now year refers to the 12 months after April 1st*/
	
	tab year censusday // (should not have xxx3)
	
	
	*assign age in baseline year
	forvalues x=1/2{
	replace age=age-`x' if inlist(year,198`x',199`x',200`x',201`x')
	}
			gen agegroup19=.
			replace agegroup19= 0 if inrange(age,0 ,0 )
			replace agegroup19= 2 if inrange(age,1 ,4 )
			replace agegroup19= 7 if inrange(age,5 ,9 )
			replace agegroup19= 12 if inrange(age,10 ,14 )
			replace agegroup19= 17 if inrange(age,15 ,19 )
			replace agegroup19= 22 if inrange(age,20 ,24 )
			replace agegroup19= 27 if inrange(age,25 ,29 )
			replace agegroup19= 32 if inrange(age,30 ,34 )
			replace agegroup19= 37 if inrange(age,35 ,39 )
			replace agegroup19= 42 if inrange(age,40 ,44 )
			replace agegroup19= 47 if inrange(age,45 ,49 )
			replace agegroup19= 52 if inrange(age,50 ,54 )
			replace agegroup19= 57 if inrange(age,55 ,59 )
			replace agegroup19= 62 if inrange(age,60 ,64 )
			replace agegroup19= 67 if inrange(age,65 ,69 )
			replace agegroup19= 72 if inrange(age,70 ,74 )
			replace agegroup19= 77 if inrange(age,75 ,79 )
			replace agegroup19= 82 if inrange(age,80 ,84 )
			replace agegroup19= 90 if inrange(age,85 ,110 )		

		drop if agegroup19==.  /*Note that here we drop those born in XXX1 and XXX2*/
	
	*assign death count to baseline year
	forvalues x=1/2{
	replace year=year-`x' if inlist(year,199`x',200`x',201`x')
	}
	
				collapse (sum) deaths , by(year county agegroup19 male)	
			
					save ${data_cleaned}VS_mort_county, replace
	

	}
	***********
	* Merge deaths, pop, county characteristics
	***********
	{
	
	
	use ${data_cleaned}VS_mort_county, clear
	merge 1:1 year county agegroup male using "${data_cleaned}countypop1990_2010", nogen
		
	*merge county povertyrates
	sort county year
	merge m:1 county year using ${data_cleaned}county_poverty.dta, keep(1 3) nogen

	*merge county median income
	sort county year
	merge m:1 county year using ${data_cleaned}county_income.dta, keep(1 3) nogen
	drop incMeanHH
		
	*Dorn's county adjustments
	replace county=12025 if county==12086
	replace county=51083 if county==51780
	replace county=8013 if county==8014
	drop if county>72000
	
	collapse pov inc* (rawsum) population deaths  [aw=pop] , by(year county male age)
	
	save ${data_cleaned}mort_county, replace
	}			
	
			******************************
			***** Generate quantiles for age groups *****
			******************************
		
			{	
				*-----------------------------Generate quantiles-----------------------------*
			
				global quantiles 20
			
				use ${data_cleaned}countypop1990_2010, clear
						
				collapse (sum) pop, by(county year)

				*merge county povertyrates
				sort county year
				merge m:1 county year using ${data_cleaned}county_poverty.dta, keep(1 3) nogen

				*merge county median income
				sort county year
				merge m:1 county year using ${data_cleaned}county_income.dta, keep(1 3) nogen
				drop  incpc incMeanHH
				
				*merge county education
				sort county year
				merge m:1 county year using ${data_cleaned}USDA_education_county, keep(1 3) nogen

				*merge county life expectancy
				sort county year
				merge m:1 county year using ${data_cleaned}LE_county, keep(1 3) nogen

							*split five biggest counties up in three subgroups (with identical poverty + epsilon)
							expand 3
							bysort county year: gen n=_n
							keep if n==1 | (n>1 & inlist(county,48201,17031,6065,6037,4013))
							
							replace pop=pop/3 if inlist(county,48201,17031,6065,6037,4013)
							replace poverty=poverty+0.0001*n if inlist(county,48201,17031,6065,6037,4013) & n>1   /*marginally adjust poverty rate to allow ranking*/
							replace county=county+.1*n if inlist(county,48201,17031,6065,6037,4013) & n>1 		/*create county subgroup list*/
							drop n

				*Dorn's county adjustments
				replace county=12025 if county==12086
				replace county=51083 if county==51780
				replace county=8013 if county==8014
		
				collapse LE_mean noHS incMedianHH poverty (rawsum) population [aw=population], by(county year)
					
				*generate annual quantile
				foreach var of varlist poverty incMedianHH noHS LE_mean{
				gen quant_`var'=.
					forvalues x=1990(10)2010{
						xtile quant_`var'`x'=`var' [aw=pop]  if year==`x', nq($quantiles)
						replace quant_`var'=quant_`var'`x' if year==`x'
						drop quant_`var'`x'
					}
				}
				
				keep county year quant_poverty quant_incMedianHH quant_noHS quant_LE_mean
		
					save temp, replace
				
				
				*----------------------Merge and expand by quantile type----------------------*

				use ${data_cleaned}mort_county, clear
				
				
							*split five biggest counties up in three subgroups (with average pop and death counts)
							expand 3
							bysort county year male age: gen n=_n
							keep if n==1 | (n>1 & inlist(county,48201,17031,6065,6037,4013))
							
							replace pop=pop/3 if inlist(county,48201,17031,6065,6037,4013)
							replace deaths=deaths/3 if inlist(county,48201,17031,6065,6037,4013)
							replace poverty=poverty+0.0001*n if inlist(county,48201,17031,6065,6037,4013) & n>1   /*marginally adjust poverty rate to allow ranking*/
							replace county=county+.1*n if inlist(county,48201,17031,6065,6037,4013) & n>1 		/*create county subgroup list*/
							drop n
				
				merge m:1 county year using temp, keep(3) nogen
				
				expand 4
				
				bysort male county year age: gen n=_n
						tab n
						
						
						gen quantile=.
						gen qtype=""
				
						local n=1
						local des_poverty "Poverty quantile"
						local des_incMedianHH "Median income quantile"
						local des_noHS "HS dropout quantile"
						local des_LE_mean "LE quantile"								
						
						gen noHS=.
						gen LE_mean=.
						foreach var of varlist poverty incMedianHH noHS LE_mean{
						replace quantile=quant_`var' if n==`n'
						replace qtype="`des_`var''"  if n==`n'
						local n=`n'+1
						}
						drop quant_* noHS LE_mean
 
 
				*----------------------Merge and expand by q1990----------------------*

				*assign 1990 quantile
				expand 2

				bysort male county year age qtype: gen q1990=_n-1
				gen r1990=quantile if year==1990
				bysort county qtype: egen R1990=min(r1990)
				replace quantile=R1990 if q1990==1
				drop r1990 R1990
				
				gen nbrcounties=1
				
				collapse inc* poverty (rawsum) population deaths [aw=pop], by(quantile year q1990 qtype male age)
					
				replace quantile=quantile*(100/$quantiles)
				
				* label variables
				label var year "Census year"
				label var pop "Population on Census Day (April 1st)"
				label var deaths "Deaths during 3 years following Census Day"
				label var agegroup19 "Age on Census Day"
				cap label define agegroup19 	0 "0"  2 "1-4"  7 "5-9"  12 "10-14"  17 "15-19"  22 "20-24"  27 "25-29"  32 "30-34"  37 "35-39"  42 "40-44"  47 "45-49"  52 "50-54"  57 "55-59"  62 "60-64"  62 "60-64"  67 "65-69"  72 "70-74"  77 "75-79"  82 "80-84"  90 "85 and above" 
				label values agegroup19 agegroup19
				label var quant "Quantile (or ranking) of county group"
				label var qtype "Indicator used to create quantiles / rank counties"
				label var q1990 "County groups fixed in 1990"
				label define q1990 0 "County groups reordered each year" 1 "County groups fixed in 1990"
				label values q1990 q1990
				label var incMedianHH "Median household income in county group"
				label var incpc "Income per capita in county group"
				label define male 0 "females" 1 "males"
				label values male male
	}
				save ${data_cleaned}mort_quantiles_county, replace
						
				

	**************************************************************************************************
	**************************************************************************************************
	*--------------------- Create Life Expectancy by County Groups ---------------------*
	**************************************************************************************************
	**************************************************************************************************
	

	*********************************************************************
	*--------------------------- Load Data -----------------------------*
	*********************************************************************
			
		************************************
		*-----Prepare Mortality & Pop------*
		************************************
	{	
			
	*********************************************************************
	*------------------- Define Globals & Programs ---------------------*
	*********************************************************************
	{

	global infix2002 "infix month 55-56 ageunit 64 agenr 65-66 str sex 59 state_res 124-125 county_res 126-128 "
					
	global infix2003 "infix month 65-66 ageunit 70 agenr 71-73 str sex 69  str state_res 29-30 str county_res 35-37 "
					
		
	*--------------------------- clean_VS ------------------------------*

			capture program drop clean_VS
			program define clean_VS
		
				
				if year<=2002{
					gen age= agenr if ageunit==0
					replace age=agenr+100 if ageunit==1
					replace age=0 if ageunit>=2 & ageunit<9
					
					gen male=(sex=="1")  if sex!="."
					
					rename state_res state
					
			
							
				}
				
				if year>=2003{
					gen age=agenr if ageunit==1
					replace age=0 if ageunit>1 & ageunit<9
					replace age=. if age==999

					gen male=(sex=="M")
					
					gen state=.		
					
					#delimit;
					qui{; replace state=1 if state_res=="AL" ; replace state=2 if state_res=="AK" ; replace state=4 if state_res=="AZ" ; replace state=5 if state_res=="AR" ; replace state=6 if state_res=="CA" ; replace state=8 if state_res=="CO" ; replace state=9 if state_res=="CT" ; replace state=10 if state_res=="DE" ; replace state=11 if state_res=="DC" ; replace state=12 if state_res=="FL" ; replace state=13 if state_res=="GA" ; replace state=15 if state_res=="HI" ; replace state=16 if state_res=="ID" ; replace state=17 if state_res=="IL" ; replace state=18 if state_res=="IN" ; replace state=19 if state_res=="IA" ; replace state=20 if state_res=="KS" ; replace state=21 if state_res=="KY" ; replace state=22 if state_res=="LA" ; replace state=23 if state_res=="ME" ; replace state=24 if state_res=="MD" ; replace state=25 if state_res=="MA" ; replace state=26 if state_res=="MI" ; replace state=27 if state_res=="MN" ; replace state=28 if state_res=="MS" ; replace state=29 if state_res=="MO" ; replace state=30 if state_res=="MT" ; replace state=31 if state_res=="NE" ; replace state=32 if state_res=="NV" ; replace state=33 if state_res=="NH" ; replace state=34 if state_res=="NJ" ; replace state=35 if state_res=="NM" ; replace state=36 if state_res=="NY" ; replace state=37 if state_res=="NC" ; replace state=38 if state_res=="ND" ; replace state=39 if state_res=="OH" ; replace state=40 if state_res=="OK" ; replace state=41 if state_res=="OR" ; replace state=42 if state_res=="PA" ; replace state=44 if state_res=="RI" ; replace state=45 if state_res=="SC" ; replace state=46 if state_res=="SD" ; replace state=47 if state_res=="TN" ; replace state=48 if state_res=="TX" ; replace state=49 if state_res=="UT" ; replace state=50 if state_res=="VT" ; replace state=51 if state_res=="VA" ; replace state=53 if state_res=="WA" ; replace state=54 if state_res=="WV" ; replace state=55 if state_res=="WI" ; replace state=56 if state_res=="WY" ; } ;
					#delimit cr
				
						destring county_res, replace
					
								}

				gen county=state*1000+county_res
				
					display as text "foreign residents"
					count if state==0 | state>56
					drop if state==0 | state>56
									
				gen censusday=(month>=4)
				
				gen deaths=1
			
				collapse state (sum) deaths, by(age male county)
			
				cap label define lab_state 001 "Alabama" 002 "Alaska" 004 "Arizona" 005 "Arkansas" 006 "California" 008 "Colorado" 009 "Connecticut" 010 "Delaware" 011 "District of Columbia" 012 "Florida" 013 "Georgia" 015 "Hawaii" 016 "Idaho" 017 "Illinois" 018 "Indiana" 019 "Iowa" 020 "Kansas" 021 "Kentucky" 022 "Louisiana" 023 "Maine" 024 "Maryland" 025 "Massachusetts" 026 "Michigan" 027 "Minnesota" 028 "Mississippi" 029 "Missouri" 030 "Montana" 031 "Nebraska" 032 "Nevada" 033 "New Hampshire" 034 "New Jersey" 035 "New Mexico" 036 "New York" 037 "North Carolina" 038 "North Dakota" 039 "Ohio" 040 "Oklahoma" 041 "Oregon" 042 "Pennsylvania" 044 "Rhode Island" 045 "South Carolina" 046 "South Dakota" 047 "Tennessee" 048 "Texas" 049 "Utah" 050 "Vermont" 051 "Virginia" 053 "Washington" 054 "West Virginia" 055 "Wisconsin" 056 "Wyoming" 
				label values state lab_state
			
			end // clean_VS
			
	}
		
		*********************************
		*------Load VS Mortality--------*
		*********************************
		
		global load_1990 "${VS_orig}Mort1990/MORT1990.PT2"		
		global load_2000 "${VS_orig}Mort2000/MORT00US.PT2" 
		global load_2010 "${VS_orig}Mort2010/MULT2010.USPART2.EXACTDOD" 
		
		*Load years
		forvalues x=1990(10)2010 {           /*Leaving out 2000s!*/
			if `x'<=2002{
		${infix2002} using ${load_`x'} , clear 
		}
		if `x'>=2003{
		${infix2003} using ${load_`x'} , clear 
		}

				gen year=`x'	
			clean_VS
				gen year=`x'
				save ${data_cleaned}VS_LE_`x'_county, replace
		}
					

		*********************************
		*-------Merge Mortality---------*
		*********************************
		
		use ${data_cleaned}VS_LE_1990_county, clear
		append using ${data_cleaned}VS_LE_2000_county
		append using ${data_cleaned}VS_LE_2010_county
		
		gen agegroup=.
		replace agegroup=0 if inrange(age,0,0)
		replace agegroup=1 if inrange(age,1,4)
		replace agegroup=5 if inrange(age,5,9)
		replace agegroup=10 if inrange(age,10,14)
		replace agegroup=15 if inrange(age,15,19)
		replace agegroup=20 if inrange(age,20,24)
		replace agegroup=25 if inrange(age,25,29)
		replace agegroup=30 if inrange(age,30,34)
		replace agegroup=35 if inrange(age,35,39)
		replace agegroup=40 if inrange(age,40,44)
		replace agegroup=45 if inrange(age,45,49)
		replace agegroup=50 if inrange(age,50,54)
		replace agegroup=55 if inrange(age,55,59)
		replace agegroup=60 if inrange(age,60,64)
		replace agegroup=65 if inrange(age,65,69)
		replace agegroup=70 if inrange(age,70,74)
		replace agegroup=75 if inrange(age,75,79)
		replace agegroup=80 if inrange(age,80,84)
		replace agegroup=85 if inrange(age,85,130)			
		
		collapse (sum) death, by(male county agegroup year)
		save ${data_cleaned}VS_LE_county, replace
					
		************************************
		*-------Prepare population---------*
		************************************
					
		use  ${data_cleaned}countypop1990_2010, clear
				
		replace agegroup=agegroup-1 if agegroup==2
		replace agegroup=agegroup-2 if agegroup>2
		replace agegroup=85 if agegroup>85
		rename agegroup19 agegroup
		sort county year male
		save "${data_cleaned}countypop1990_2010_LE", replace				
					

		*********************************
		*-------Merge VS & POP ---------*
		*********************************
		
		use "${data_cleaned}countypop1990_2010_LE", clear
		
		merge 1:1 county male year agegroup using ${data_cleaned}VS_LE_county
		drop _m
		save  ${data_cleaned}LE_county_quant, replace																
						
	}
		*********************************************************************
		*---------------Generate quantile e(x)-----------------*
		*********************************************************************

		******************************
		***** Merge county characteristics for ranking *****
		******************************

			
		{
					global quantiles 20
				
					use ${data_cleaned}countypop1990_2010, clear

					collapse (rawsum) population , by(county year)

					*merge county povertyrates
					sort county year
					merge m:1 county year using ${data_cleaned}county_poverty.dta, keep(1 3) nogen
					
					*merge county median income
					sort county year
					merge m:1 county year using ${data_cleaned}county_income.dta, keep(1 3) nogen
					drop incMeanHH
					
					*merge county education
					sort county year
					merge m:1 county year using ${data_cleaned}USDA_education_county, keep(1 3) nogen

					*merge county life expectancy
					sort county year
					merge m:1 county year using ${data_cleaned}LE_county, keep(1 3) nogen
					rename LEmale IHME_LEmale
					rename LEfemale IHME_LEfemale

								*split five biggest counties up in three subgroups (with identical (+epsilon) poverty rate)
								expand 3
								bysort county year: gen n=_n
									keep if n==1 | (n>1 & inlist(county,48201,17031,6065,6037,4013))
								
								replace pop=pop/3 if inlist(county,48201,17031,6065,6037, 4013)
								replace poverty=poverty+0.0001*n if inlist(county,48201,17031,6065,6037,4013) & n>1   /*marginally adjust poverty rate to allow ranking*/
								replace county=county+.1*n if inlist(county,48201,17031,6065,6037,4013) & n>1 		/*create county subgroup list*/
								drop n
								
					replace county=12025 if county==12086
					replace county=51083 if county==51780
					replace county=8013 if county==8014

					collapse poverty incMedianHH incpc noHS LE_mean I* state (rawsum) population [aw=population], by(county year)
									
					******************************
					***** Generate quantiles for age groups *****
					******************************
		
					*generate annual quantile
					foreach var of varlist poverty incMedianHH incpc noHS LE_mean{
					gen quant_`var'=.
						forvalues x=1990(10)2010{
							xtile quant_`var'`x'=`var' [aw=pop]  if year==`x', nq($quantiles)
							replace quant_`var'=quant_`var'`x' if year==`x'
							drop quant_`var'`x'
						}
					}
									
					keep county year quant_poverty poverty I*
												
						save temp_LE, replace
					
					bysort county (year): replace quant_pov=quant_pov[_n-1] if year==2000
					bysort county (year): replace quant_pov=quant_pov[_n-2] if year==2010
					
					gen q1990=1 
					
					append using temp_LE
					replace q1990=0 if q1990==.
					
					expand 2
					bysort year county q1990: gen male=_n-1
					
					expand 19
					bysort year county q1990 male: gen agegroup=_n-1 if _n<=2
					bysort year county q1990 male: replace agegroup=(_n-2)*5 if _n>2
				
						save temp_LE, replace
						
			
				******************************
				***** Merge county mortality and quantiles *****
				******************************
					
				use ${data_cleaned}LE_county_quant, clear
					
						replace county=12025 if county==12086
						replace county=51083 if county==51780
						replace county=8013 if county==8014
						
				collapse (sum) pop death , by(county year male age)		
					
						*split five biggest counties up in three subgroups (with 1/3 pop and death counts each)
								expand 3
								bysort county year male age: gen n=_n
								keep if n==1 | (n>1 & inlist(county,48201,17031,6065,6037,4013))
								
								replace pop=pop/3 if inlist(county,48201,17031,6065,6037, 4013)
								replace deaths=deaths/3 if inlist(county,48201,17031,6065,6037, 4013)
								replace county=county+.1*n if inlist(county,48201,17031,6065,6037,4013) & n>1 		/*create county subgroup list*/
								drop n
				
				merge 1:m county year male age using temp_LE, keep(3) nogen
				drop if age==. | quant==.
				
				rename quant_poverty quantile
				
				collapse I* (rawsum) pop death [aw=pop], by(year age male quantile q1990)
					
						***********************************************
						*---------generate LE  ---------------*
						***********************************************
		
						gen minage=age
						sort year male age
						
						*age specific death rate rate
						gen M=death/pop

						*length of the 19 age interval used in this abridged life table
						gen n=1 if minage==0
						replace n=4 if minage==1
						replace n=5 if n==. & minage<85
						replace n=11 if minage==85

						*a= Fraction of the age interval lived by those in the cohort population who die in the interval.  
						*Values of 'a' can be derived from reference populations or life tables using the methods outlined by Chiang, 1984.
						*Here we set a equal to 0.5 for all age groups except <1 where 0.1 is used.
						*Ref: Chiang CL. The Life Table and its Applications.Malabar (FL): Robert E Krieger Publ Co, 1984.
						
						gen a=.1 if minage==0
						replace a=.5 if a==.

						*q = Conditional probablity that an indivdual who has survived to start of the age interval will die in the age interval.  
						*Ref: Chiang CL. The Life Table and its Applications.Malabar (FL): Robert E Krieger Publ Co, 1984.
						gen q= (n*M)/(1+n*(1-a)*M)
						replace q=1 if minage==85
						
						*probability survival
						gen p=1-q
						
						*Life table cohort population / reference population
						gen I=100000 if minage==0
						bysort  quantile year male q1990 (age): replace I=I[_n-1]*p[_n-1] if minage>0
						
						*deaths in reference population
						bysort  quantile year male q1990 (age): gen d=I-I[_n+1]
						replace d=I if minage==85

						*life years lived in reference population in interval
						bysort  quantile year male q1990 (age): gen L=n*(I[_n+1]+(a*d))
						replace L=I/M if minage==85

						*Cumulative number of life years lived in reference population in the age interval and all subsequent intervals (until last person is dead).
						gsort  quantile year male q1990 -minage
						gen T=L if minage==85
						replace T=L+T[_n-1] if minage<85
						sort quantile year male q1990 minage 

						*life expectancy
						gen e=T/I
									
				
					***********************************************
					*---------Confidence Interval  ---------------*
					***********************************************
					
						*Var(q)
						* Variance of the conditional probability of death, using Chiang's I method, Chiang CL (1978) Life Table and Mortality Analysis, World Health Organisation.
						gen var_q=(q^2*(1-q))/death
						
						*var_temp1: I_x^2 [(1-a_x)n1 + e_x+1]^2 var(qx)
						*See formula for the calculation of variance, Var(ex) in: Chiang CL (1978) Life Table and Mortality Analysis, World Health Organisation.
						bysort  quantile year male q1990 (age): gen var_temp1=I^2*( (1-a)*n+e[_n+1] )^2*var_q
						replace var_temp1=0 if minage==85
					
						*sum(var_temp1): sum(I_x^2 [(1-a_x)n1 + ex+1]^2 var(qx))
						gsort  quantile year male q1990 -age
						gen sum_var_temp1=0 if minage==85
						by quantile year male q1990: replace sum_var_temp1= var_temp1+ sum_var_temp1[_n-1] if minage!=85
						sort quantile year male age q1990
						
						*Var(e_x)
						gen var_e=sum_var_temp1/(I^2)
					
						*SE(e_x)
						gen SE_e=var_e^.5
						
						gen e_upperCI=e+1.96*SE_e
						gen e_lowerCI=e-1.96*SE_e
		
		}
		
		************
		*Choose e^x*
		************
		
		{
									
					keep if minage==0
					drop agegr deaths
					
					gen LEfemale=e if male==0
					gen SEfemale=SE_e if male==0
					bysort   quant year q1990 (male): gen LEmale=e[_n+1] if male==0
					bysort   quant year q1990 (male): gen SEmale=SE_e[_n+1] if male==0

					keep if male==0
					drop minage-e_lowerCI male pop
			
			
			replace quantile=quantile*(100/$quantiles)
			 
			
				* label variables
				label var quant "Quantile (or ranking) of county group"
				label var q1990 "County groups fixed in 1990"
				label define q1990 0 "County groups reordered each year" 1 "County groups fixed in 1990"
				label values q1990 q1990
				label var LEfemale "Female life expectancy at birth"
				label var LEmale "Male life expectancy at birth"
				label var SEfemale "Standard deviation of LEfemale"
				label var SEmale "Standard deviation of LEmale"
				label var IHME_LEfemale "Female life expectancy at birth, IHME data"
				label var IHME_LEmale "Male life expectancy at birth, IHME data"
				
		}	
		
			save ${data_cleaned}LE_quantile, replace
		
		
